#delimit ;
set more 1;
clear;
capture log close;
prog drop _all;
set seed 123;



program define triplet;

display "father cohort " `1' " to " `2' " son cohort " `3' " to " `4'; 

use "D:\Data\WorkData\702092\stata-data\DATA_2012\fodselsdata", clear;
keep if koen=="1";
gen yob=year(foeddato);
keep if yob>=`3' & yob<=`4';
joinby pnr using "D:\Data\WorkData\702092\stata-data\DATA_2012\ftdb2010", unmatched(master);
keep if pnrf!="" & pnrm!="";
keep if madopt=="1";
rename foeddato dobs;
rename pnr pnrs;
keep pnrs pnrf pnrm dobs;
rename pnrf pnr;

*link father;
joinby pnr using "D:\Data\WorkData\702092\stata-data\DATA_2012\fodselsdata", unmatched(master);
gen yob=year(foeddato);
tab koen;
keep if yob>=`1' & yob<=`2';
keep if koen=="1";
rename pnr pnrf;
rename foeddato dobf;
keep pnrs pnrf pnrm dobs dobf;

*link mother;
rename pnrm pnr;
joinby pnr using "D:\Data\WorkData\702092\stata-data\DATA_2012\fodselsdata", unmatched(master);
keep if foeddato!=.;
keep if koen=="2";
rename pnr pnrm;
rename foeddato dobm;
keep pnrs pnrf pnrm dobs dobf dobm;

*find father's son1 & son2 with same mother as son1, then drop twins;
egen familyid=group(pnrm pnrf);
sort familyid dobs;
qby familyid: gen bordermf=_n;
sort pnrf dobs;
qby pnrf: gen borderf=_n;
*keep only sons of same mother-father match as first son of the father;
keep if borderf==bordermf;
egen multibirth=count(dobs), by(familyid dobs);
keep if multibirth==1;
tab bordermf;
keep if bordermf<3;
rename bordermf border;
keep pnrs pnrf pnrm dobs dobf dobm border;
order pnrs pnrf pnrm dobs dobf dobm border;
compress;
save temp, replace;

keep pnrf dobf;
gen pnr=pnrf;
sort pnr;
keep if pnr!=pnr[_n-1];
rename dobf dob;
gen border=0;
save temp0, replace;

use temp, clear;
rename pnrs pnr;
rename dobs dob;
keep pnr pnrf dob border;
append using temp0;

*drop households without son1;
egen sbo=sd(border), by(pnrf);
drop if sbo>1;
keep pnr pnrf dob border;
save temp2, replace;

*find sons who are themselves fathers;
egen gf=count(border), by(pnr);
tab gf border;
keep if gf==2 & border==0;
keep pnrf;
sort pnrf;
keep if pnrf!=pnrf[_n-1];
*keep only first 2 generations;
joinby pnrf using temp2, unmatched(both);
drop if _merge==3;
sort pnr;
keep if pnr!=pnr[_n-1];
label variable pnr "individual identifier";
label variable pnrf "father at birth identifier";
label variable dob "date of birth";
label variable border "birth order within sample";
keep pnr pnrf dob border;
order pnr pnrf dob border;
save bob012, replace;
keep pnr;
save bob012pnronly, replace;

*collect earnings histories;
use pnr lonind using "D:\Data\WorkData\702092\stata-data\DATA_2012\idaperson1980", clear;
gen year=1980;
for Y in num 1981/2009:
  append using "D:\Data\WorkData\702092\stata-data\DATA_2012\idapersonY", keep(pnr lonind)
\ replace year=Y if year==.;
rename lonind corloen;
for Y in num 2010 2011:
  append using "D:\Data\WorkData\702092\stata-data\corloenY", keep(pnr corloen)
\ replace year=Y if year==.;
for Y in num 2012/2014:
  append using "D:\Data\WorkData\702092\stata-data\2016earn\indY", keep(pnr corloen)
\ replace year=Y if year==.;
rename corloen earn;
joinby pnr using bob012pnronly;
for Y in num 1980/2014 \ R in num 2396 2677 2948 3152 3350 3507 3636 3782 3953 4142 4251 4353 4445 4500 4590 4686 4785 4890 4980 5104 5253 5377 5507 5622 5687 5790 5900 6001 6205 6287 6432 6609 6768 6821 6860:
  replace earn=earn/R*6768 if year==Y;
recode earn min/0=0;
compress;
keep pnr earn year;
reshape wide earn, i(pnr) j(year);
compress;
joinby pnr using bob012;
order pnr pnrf dob border;
*temporary dataset of earnings histories - needs to be purged of siblings not originally sampled;
save temp-earnwide-triplet, replace;


*find all siblings - even outside original sampling frame;
use "D:\Data\WorkData\702092\stata-data\DATA_2012\fodselsdata", clear;
gen byte male=koen=="1";
joinby pnr using "D:\Data\WorkData\702092\stata-data\DATA_2012\ftdb2010";
keep if pnrf!="" & pnrm!="";
keep if madopt=="1";
rename foeddato dob;
rename pnr pnrk;
keep pnrk pnrf pnrm dob male;
rename pnrf pnr;

*link father for selection on his age;
joinby pnr using "D:\Data\WorkData\702092\stata-data\DATA_2012\fodselsdata";
gen yob=year(foeddato);
keep if yob>=`1' & yob<=`2';
keep if koen=="1";
rename pnr pnrf;
rename pnrk pnr;
keep pnr pnrf pnrm dob male;

*count within multiple births;
egen multib=sum(male), by(dob pnrf);
label variable multib "multiple birth boys";
egen multit=count(male), by(dob pnrf);
label variable multit "multiple birth boys+girls";

*order twins randomly;
gen u=runiform();

*population birth orders - total (boys+girls);
sort pnrf dob u;
qby pnrf: gen fbot=_n;
label variable fbot "father birth order total";
qby pnrf: gen fbt=_N;
label variable fbt "father births total";
sort pnrf pnrm dob u;
qby pnrf pnrm: gen fmbot=_n;
label variable fmbot "father-mother birth order total";
qby pnrf pnrm: gen fmbt=_N;
label variable fmbt "father-mother births total";

*population birth orders - boys;
sort male pnrf dob u;
qby male pnrf: gen fbob=_n;
label variable fbob "father birth order boys";
qby male pnrf: gen fbb=_N;
label variable fbb "father births boys";
sort male pnrf pnrm dob u;
qby male pnrf pnrm: gen fmbob=_n;
label variable fmbob "father-mother birth order boys";
qby male pnrf pnrm: gen fmbb=_N;
label variable fmbb "father-mother births boys";

*relative birth order among brothers;
gen nobsf=fbob-1;
label variable nobsf "number of older brothers with same father";
gen nybsf=fbb-fbob;
label variable nybsf "number of younger brothers with same father";
gen nobsfm=fmbob-1;
label variable nobsfm "number of older brothers with same father-mother";
gen nybsfm=fmbb-fmbob;
label variable nybsfm "number of younger brothers with same father-mother";

*relative birth order among sisters;
gen nossf=fbot-fbob;
label variable nossf "number of older sisters with same father";
gen nyssf=fbt-fbb-(fbot-fbob);
label variable nyssf "number of younger sisters with same father";
gen nossfm=fmbot-fmbob;
label variable nossfm "number of older sisters with same father-mother";
gen nyssfm=fmbt-fmbb-(fmbot-fmbob);
label variable nyssfm "number of younger sisters with same father-mother";

keep pnr pnrf multib multit fbot fbt fmbot fmbt fbob fbb fmbob fmbb nobsf nybsf nobsfm nybsfm nossf nyssf nossfm nyssfm;
save temp3, replace;

*purge earnings file of these inconsistent birth orderings;
use pnr border using temp-earnwide-triplet, clear;
keep if border>0;
joinby pnr using temp3;
keep if border!=fmbob;
keep pnrf;
sort pnrf;
keep if pnrf!=pnrf[_n-1];
joinby pnr using temp-earnwide-triplet, unmatched(both);
drop if _merge==3;
drop _merge;
compress;
save earnwide-triplet-father-`1'-`2'-son-`3'-`4', replace;

*keep siblings data only for final earnings sample;
keep if border>0;
keep pnr;
joinby pnr using temp3;
compress;
save all-sibs-father-`1'-`2'-son-`3'-`4', replace;

end;


log using triplet-data-creation-20160706, text replace;

triplet 1935 1969 1955 1989;
*triplet 1930 1969 1955 1989;

log close;




